# This is an R script to replicate the analyses from the article entitled 
# "A mating-induced reproductive gene promotes Anopheles tolerance to Plasmodium falciparum infection" 
# by Perrine Marcenac, W. Robert Shaw, Evdoxia G. Kakani, Sara N. Mitchell, Adam South, 
# Kristine Werling, Eryney Marrogi, Daniel G. Abernathy, Rakiswende Serge Yerbanga, Roch K. Dabire,
# Abdoulaye Diabate, Thierry Lefevre, and Flaminia Catteruccia (2020)
# Code developed by Thierry Lefevre: thierry.lefevre@ird.fr

library(emmeans)
library(glmmTMB)

####################################################################################################
### Fig 1A: analysis of the effect of mating and infection on the number of eggs in An. gambiae ####
####################################################################################################

###### load data
eggang <- read.table("figure1a_gambiae_eggs.txt",header=T,stringsAsFactors = TRUE)

### metadata: dataframe "eggang" has 5 columns:
# 	mosquito_id : a unique code for each dissected mosquito (n= 222)
# 	replicate : a 3-level categorical variable corresponding to each of the three experimental replicates
# 	mating : a binomial variable corresponding to mating status(0= unmated, 1= mated)
# 	infection : a binomial variable corresponding to infection status(0= uninfected, 1= infected)
#   eggs : number of eggs developed

eggang$infection <- as.factor(eggang$infection)
eggang$mating <- as.factor(eggang$mating)

hist(eggang$eggs)
shapiro.test(eggang$eggs) # normality check
fligner.test(eggang$eggs~eggang$mating) # homogeneity of variance between mated and unmated 
fligner.test(eggang$eggs~eggang$infection) # homogeneity of variance between infected and uninfected

###### Gaussian mixed-effect model of the number of developing eggs in relation to mating and infection in An. gambiae
mod1 <- glmmTMB(eggs~mating*infection + (1|replicate), data=eggang,family = gaussian(link= "identity"))
mod2 <- glmmTMB(eggs~mating+infection + (1|replicate), data=eggang,family = gaussian(link= "identity"))
anova(mod1,mod2) ## interaction term not significant
mod3 <- glmmTMB(eggs~mating + (1|replicate), data=eggang,family = gaussian(link= "identity"))
anova(mod2,mod3) ## infection not significant
mod4 <- glmmTMB(eggs~1 + (1|replicate), data=eggang,family = gaussian(link= "identity"))
anova(mod3,mod4) ## mating not significant

summary(emmeans(mod1, pairwise~mating+infection, type="response"), infer = TRUE) # multiple pairwise comparison

#####################################################################################################
### Fig 1B: analysis of the effect of mating and infection on the number of eggs in An. stephensi ###
#####################################################################################################

###### load data
eggans <- read.table("figure1b_stephensi_eggs.txt",header=T,stringsAsFactors = TRUE)

### metadata: dataframe "eggans" has 5 columns:
# 	mosquito_id : a unique code for each dissected mosquito (n= 293)
# 	replicate : a 3-level categorical variable corresponding to each of the three experimental replicates
# 	mating : a binomial variable corresponding to mating status(0= unmated, 1= mated)
# 	infection : a binomial variable corresponding to infection status(0= uninfected, 1= infected)
#   eggs : number of eggs developed

eggans$infection <- as.factor(eggans$infection)
eggans$mating <- as.factor(eggans$mating)

hist(eggans$eggs)
shapiro.test(eggans$eggs) # non-normal distribution (over representation of egg="0" but only for unmated females) 

eggans$egg_prev <- ifelse(eggans$eggs > 0 ,1,0) ## addition of a variable "egg_prev" to the dataframe "eggans". 
## "egg_prev" is a binomial variable (0: no egg, 1: egg number>0)

## Binomial mixed-effect model of egg prevalence in relation to mating and infection in An. stephensi
glmm_egg1 <- glmmTMB(egg_prev~mating*infection + (1|replicate), data=eggans, family=binomial)
glmm_egg2 <- glmmTMB(egg_prev~infection+mating+ (1|replicate), data=eggans,family=binomial)
anova(glmm_egg1,glmm_egg2) ## interaction term not significant
glmm_egg3<- glmmTMB(egg_prev~mating + (1|replicate), data=eggans,family=binomial)
anova(glmm_egg2,glmm_egg3) ## infection not significant
glmm_egg4<- glmmTMB(egg_prev~1 + (1|replicate), data=eggans,family=binomial)
anova(glmm_egg3,glmm_egg4) ## mating significant

eggans2<-subset(eggans,eggs>0) ## exclude egg=0 and analyze the effect of mating and infection on egg number
hist(eggans2$eggs)
## zero-truncated poisson vs gaussian mixed-effect model of the number of developing eggs in relation to mating and infection in An. stephensi ----
glmm_negg1 <- glmmTMB(eggs~infection*mating + (1|replicate), data=eggans2, family=truncated_poisson(link = "log") )## ZT poisson model
glmmg1<-glmmTMB(eggs~infection*mating + (1|replicate), data=eggans2) ## gaussian model
anova(glmm_negg1,glmmg1) # comparison between the ZT poisson and the gaussian model. The Gaussian model is better (AIC= 3271 vs 2577).

## gaussian mixed-effect model of number of developing eggs in relation to mating and infection in An. stephensi 
mod5 <- glmmTMB(eggs~infection*mating + (1|replicate), data=eggans2,family = gaussian(link= "identity"))
mod6 <- glmmTMB(eggs~infection+mating + (1|replicate), data=eggans2,family = gaussian(link= "identity"))
anova(mod5,mod6) ## interaction term not significant
mod7<- glmmTMB(eggs~mating + (1|replicate), data=eggans2,family = gaussian(link= "identity"))
anova(mod6,mod7) ## infection not significant
mod8<- glmmTMB(eggs~1 + (1|replicate), data=eggans2,family = gaussian(link= "identity"))
anova(mod7,mod8) ## mating significant

summary(emmeans(mod5, pairwise~mating+infection, type="response"), infer = TRUE) # multiple pairwise comparison


##################################################################################
### Fig 2A: analysis of the effect of mating on Pf infection in An. gambiae    ###
##################################################################################

###### load data
pfang <- read.table("figure2a_gambiae_oocysts.txt",header=T,stringsAsFactors = TRUE)

### metadata: dataframe "pfang" has 5 columns:
# 	mosquito_id : a unique code for each dissected mosquito (n= 218)
# 	replicate : a 3-level categorical variable corresponding to each of the three experimental replicates
# 	mating : a binomial variable corresponding to mating status(0= unmated, 1= mated)
# 	prevalence : a binomial variable corresponding to infection status(0= uninfected, 1= infected)
#   oocyst : number of developing oocyst in mosquito midgut

pfang$prevalence <- as.factor(pfang$prevalence)
pfang$mating <- as.factor(pfang$mating)

## Binomial mixed-effect model of oocyst prevalence in relation to mating in An. gambiae 
glmm_pf1 <- glmmTMB(prevalence~mating + (1|replicate), data=pfang, family=binomial)
glmm_pf2 <- glmmTMB(prevalence~1+ (1|replicate), data=pfang,family=binomial)
anova(glmm_pf1,glmm_pf2) ## mating not significant


pfang2<-subset(pfang,oocyst>0) ## exclude individuals with oocyst=0 and analyze the effect of mating on oocyst number

hist(pfang2$oocyst) ## Negative binomial distribution

## zero-truncated negative binomial mixed-effect model of number of developing oocysts in relation to mating in An. gambiae ----
glmm_nooc1 <- glmmTMB(oocyst~mating + (1|replicate), data=pfang2, family=truncated_nbinom2(link = "log"))
glmm_nooc2 <- glmmTMB(oocyst~1 + (1|replicate), data=pfang2, family=truncated_nbinom2(link = "log"))
anova(glmm_nooc1,glmm_nooc2) ## mating not significant

summary(emmeans(glmm_nooc1, pairwise~mating, type="response"), infer = TRUE) # multiple pairwise comparison


##################################################################################
### Fig 2B: analysis of the effect of mating on Pf infection in An. stephensi  ###
##################################################################################

###### load data
pfans <- read.table("figure2b_stephensi_oocysts.txt",header=T,stringsAsFactors = TRUE)
### metadata: dataframe "pfans" has 4 columns:
# 	mosquito_id : a unique code for each dissected mosquito (n= 218)
# 	replicate : a 3-level categorical variable corresponding to each of the three experimental replicates
# 	mating : a binomial variable corresponding to mating status(0= unmated, 1= mated)
#   oocysts : number of developing oocysts in mosquito midgut

pfans$mating <- as.factor(pfans$mating)
pfans$prevalence <- ifelse(pfans$oocysts > 0 ,1,0) ## addition of a new variable "prevalence" to the dataframe "pfans". 
## "prevalence" is a binomial variable (0: uninfected, 1: infected)

## Binomial mixed-effect model of oocyst prevalence in relation to mating in An. stephensi ----
glmm_pf3 <- glmmTMB(prevalence~mating + (1|replicate), data=pfans, family=binomial)
glmm_pf4 <- glmmTMB(prevalence~1+ (1|replicate), data=pfans,family=binomial)
anova(glmm_pf3,glmm_pf4) ## mating term not significant

pfans2<-subset(pfans,oocysts>0) ## exclude individuals with oocyst=0 and analyze the effect of mating on oocyst number
hist(pfans2$oocysts) ## Negative binomial distribution

## zero-truncated negative binomial mixed-effect model of number of developing oocysts in relation to mating in An. gambiae ----
glmm_nooc3 <- glmmTMB(oocysts~mating + (1|replicate), data=pfans2, family=truncated_nbinom2(link = "log"))
glmm_nooc4 <- glmmTMB(oocysts~1 + (1|replicate), data=pfans2, family=truncated_nbinom2(link = "log"))
anova(glmm_nooc3,glmm_nooc4) ## mating term not significant

summary(emmeans(glmm_nooc3, pairwise~mating, type="response"), infer = TRUE) # multiple pairwise comparison

############################################################################################################################
### Fig 3A: analysis of the effect of gametocytemia and miso-treatment on the number of developing eggs in An. coluzzii  ###
############################################################################################################################

###### load data
egganc <- read.table("figure3_field.txt",header=T,stringsAsFactors = TRUE)
summary(egganc)
### metadata: dataframe "egganc" has 9 columns:
# 	id : a unique code for each dissected mosquito (n= 224)
# 	replicate : a 3-level categorical variable corresponding to each of the three experimental replicates
# 	treatment : a 2-level categorical variable corresponding to gene silencing: Miso silenced (dsMISO) or unsilenced (dsControl)
#   gametocytemia : number of sexual parasite stage in the veinous blood of the gametocyte carrier volunteers
#   carrier: a six levels categorical variable corresponding to each gametocyte carrier aka parasite isolate
#   eggs: number of developing eggs
#   egg_prev: a binomial variable (0: no egg, 1: egg number>0)
#   oocyst: number of oocyst in mosquito midgut
#   prevalence: a binomial variable (0: uninfected, 1: infected)

egganc$egg_prev <- as.factor(egganc$egg_prev)
egganc$prevalence <- as.factor(egganc$prevalence) 
egganc$gametocytemia <- as.factor(egganc$gametocytemia)
hist(egganc$eggs) ## inflated Poisson distribution 

## Zero-inflated poisson  vs ZI negative binomial mixed-effect model of egg number in relation to gametocytemia and treatment in An. coluzzii
fit_zipoisson <- glmmTMB(eggs~gametocytemia*treatment+(1|replicate/carrier),data=egganc,ziformula=~1,family=poisson)
fit_zinbinom1<-glmmTMB(eggs~gametocytemia*treatment+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zipoisson,fit_zinbinom1) ## the ZI negative binomial model is better (AIC of 3854 vs 1721 for the ZI NB)
fit_zinbinom2<-glmmTMB(eggs~gametocytemia+treatment+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom1,fit_zinbinom2) ## significant interaction
fit_zinbinom3<-glmmTMB(eggs~gametocytemia+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom2,fit_zinbinom3) ## treatment not significant
fit_zinbinom4<-glmmTMB(eggs~1+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom3,fit_zinbinom4) ## gametocytemia not significant

# pairwise comparison in the control group
egganc_ctrl<-subset(egganc,treatment=="dsControl")
fit_zinbinom3<-glmmTMB(eggs~gametocytemia+(1|replicate/carrier),data=egganc_ctrl,family=nbinom1)
summary(emmeans(fit_zinbinom3, pairwise~gametocytemia, type="response"), infer = TRUE) # multiple pairwise comparison

# pairwise comparison in the Miso
egganc_miso<-subset(egganc,treatment=="dsMISO")
fit_zinbinom4<-glmmTMB(eggs~gametocytemia+(1|replicate/carrier),data=egganc_miso,family=nbinom1)
summary(emmeans(fit_zinbinom4, pairwise~gametocytemia, type="response"), infer = TRUE) # multiple pairwise comparison

###############################################################################################################################
### Fig 3B: analysis of the effect of gametocytemia and miso treatment on the number of developing oocysts in An. coluzzii  ###
###############################################################################################################################

## Binomial mixed-effect model of oocyst prevalence in relation to gametocytemia and treatment in An. coluzzii ----
glmm_pf5 <- glmmTMB(prevalence~gametocytemia*treatment + (1|replicate/carrier), data=egganc, family=binomial)
glmm_pf6 <- glmmTMB(prevalence~gametocytemia+treatment+ (1|replicate/carrier), data=egganc,family=binomial)
anova(glmm_pf5,glmm_pf6) ## interaction term not significant
glmm_pf7 <- glmmTMB(prevalence~gametocytemia+ (1|replicate/carrier), data=egganc,family=binomial)
anova(glmm_pf6,glmm_pf7) ## treatment term not significant
glmm_pf8 <- glmmTMB(prevalence~1+ (1|replicate/carrier), data=egganc,family=binomial)
anova(glmm_pf7,glmm_pf8) ## gametocytemia term not significant

egganc2<-subset(egganc,oocyst>0) ## exclude individuals with oocyst=0
## this removed 44 individuals
hist(egganc2$oocyst) ## Negative binomial distribution

## zero-truncated negative binomial mixed-effect model of number of developing oocysts in relation to gametiocytemia and treatment in An. coluzzii
glmm_nooc5 <- glmmTMB(oocyst~gametocytemia*treatment + (1|replicate/carrier), data=egganc2, family=truncated_nbinom2(link = "log"))
glmm_nooc6 <- glmmTMB(oocyst~gametocytemia+treatment + (1|replicate/carrier), data=egganc2, family=truncated_nbinom2(link = "log"))
anova(glmm_nooc5,glmm_nooc6) #interaction term not significant
glmm_nooc7 <- glmmTMB(oocyst~gametocytemia+ (1|replicate/carrier), data=egganc2, family=truncated_nbinom2(link = "log"))
anova(glmm_nooc6,glmm_nooc7) # treatment not significant
glmm_nooc8 <- glmmTMB(oocyst~1+ (1|replicate/carrier), data=egganc2, family=truncated_nbinom2(link = "log"))
anova(glmm_nooc7,glmm_nooc8) # gametocytemia significant

# pairwise comparisons in the control 
egganc_ctrl2<-subset(egganc2,treatment=="dsControl")
glmm_nooc9<-glmmTMB(oocyst~gametocytemia+(1|replicate/carrier),data=egganc_ctrl2,family=truncated_nbinom2(link = "log"))
summary(emmeans(glmm_nooc9, pairwise~gametocytemia, type="response"), infer = TRUE) # multiple pairwise comparison

# pairwise comparison in the Miso
egganc_miso2<-subset(egganc2,treatment=="dsMISO")
glmm_nooc10<-glmmTMB(oocyst~gametocytemia+(1|replicate/carrier),data=egganc_miso2,family=truncated_nbinom2(link = "log"))
summary(emmeans(glmm_nooc10, pairwise~gametocytemia, type="response"), infer = TRUE)

###############################################################################################################################
### Fig 3C: analysis of the effect of miso treatment and oocyst intensity on the number of developing eggs in An. coluzzii  ###
###############################################################################################################################

## Zero-inflated negative binomial mixed-effect model of egg number in relation to oocyst intensity and treatment in An. coluzzii 

fit_zinbinom5<-glmmTMB(eggs~oocyst*treatment+(1|replicate/carrier),data=egganc,family=nbinom1)
fit_zinbinom6<-glmmTMB(eggs~oocyst+treatment+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom5,fit_zinbinom6) ## significant interaction
fit_zinbinom7<-glmmTMB(eggs~oocyst+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom6,fit_zinbinom7) ## treatment main effect not significant
fit_zinbinom8<-glmmTMB(eggs~1+(1|replicate/carrier),data=egganc,family=nbinom1)
anova(fit_zinbinom7,fit_zinbinom8) ## oocyst main effect not significant

####################################################################################################################
### Fig 3D: analysis of the effect of miso treatment and oocyst intensity on the egg prevalence in An. coluzzii  ###
####################################################################################################################

## binomial mixed-effect model of egg prevalence in relation to oocyst intensity and treatment in An. coluzzii ----

glmm_pf9<-glmmTMB(egg_prev~oocyst*treatment+(1|replicate/carrier),data=egganc,family=binomial)
glmm_pf10<-glmmTMB(egg_prev~oocyst+treatment+(1|replicate/carrier),data=egganc,family=binomial)
anova(glmm_pf9,glmm_pf10) ## significant interaction
glmm_pf11<-glmmTMB(egg_prev~oocyst+(1|replicate/carrier),data=egganc,family=binomial)
anova(glmm_pf10,glmm_pf11) ## treatment main effect not significant
glmm_pf12<-glmmTMB(egg_prev~1+(1|replicate/carrier),data=egganc,family=binomial)
anova(glmm_pf11,glmm_pf12) ##oocyst main effect not significant

## estimation of odds ratio for the miso group
## miso group
egganc_miso<-subset(egganc,treatment=="dsMISO")
glm_miso<-glm(egg_prev~oocyst,binomial,data=egganc_miso)
library(questionr)
odds.ratio(glm_miso, level=0.95)
##### END -----


